From 87b3b5b34a4d827047b612c7ef9647b07bd13114 Mon Sep 17 00:00:00 2001 From: "djm@kirby.fc.hp.com" Date: Wed, 12 Oct 2005 10:56:14 -0600 Subject: [PATCH] Fast hyperprivop for itc.i and itc.d (works, but default off for stability) --- xen/arch/ia64/asm-offsets.c | 3 + xen/arch/ia64/xen/hyperprivop.S | 162 +++++++++++++++++++++++++++++++- 2 files changed, 161 insertions(+), 4 deletions(-) diff --git a/xen/arch/ia64/asm-offsets.c b/xen/arch/ia64/asm-offsets.c index d1a6893fdd..ed770ee725 100644 --- a/xen/arch/ia64/asm-offsets.c +++ b/xen/arch/ia64/asm-offsets.c @@ -82,10 +82,13 @@ void foo(void) DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct vcpu, arch._thread.ksp)); DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct vcpu, arch._thread.on_ustack)); + DEFINE(IA64_VCPU_DOMAIN_OFFSET, offsetof (struct vcpu, domain)); DEFINE(IA64_VCPU_META_RR0_OFFSET, offsetof (struct vcpu, arch.metaphysical_rr0)); DEFINE(IA64_VCPU_META_SAVED_RR0_OFFSET, offsetof (struct vcpu, arch.metaphysical_saved_rr0)); DEFINE(IA64_VCPU_BREAKIMM_OFFSET, offsetof (struct vcpu, arch.breakimm)); DEFINE(IA64_VCPU_IVA_OFFSET, offsetof (struct vcpu, arch.iva)); + DEFINE(IA64_VCPU_DTLB_PTE_OFFSET, offsetof (struct vcpu, arch.dtlb_pte)); + DEFINE(IA64_VCPU_ITLB_PTE_OFFSET, offsetof (struct vcpu, arch.itlb_pte)); DEFINE(IA64_VCPU_IRR0_OFFSET, offsetof (struct vcpu, arch.irr[0])); DEFINE(IA64_VCPU_IRR3_OFFSET, offsetof (struct vcpu, arch.irr[3])); DEFINE(IA64_VCPU_INSVC3_OFFSET, offsetof (struct vcpu, arch.insvc[3])); diff --git a/xen/arch/ia64/xen/hyperprivop.S b/xen/arch/ia64/xen/hyperprivop.S index cddfba7f69..e20b5d91e3 100644 --- a/xen/arch/ia64/xen/hyperprivop.S +++ b/xen/arch/ia64/xen/hyperprivop.S @@ -14,6 +14,10 @@ #include #include +#define _PAGE_PPN_MASK 0x0003fffffffff000 //asm/pgtable.h doesn't do assembly +#define PAGE_PHYS 0x0010000000000761 //__pgprot(__DIRTY_BITS|_PAGE_PL_2|_PAGE_AR_RWX) +#define _PAGE_PL_2 (2<<7) + #if 1 // change to 0 to turn off all fast paths #define FAST_HYPERPRIVOPS #define FAST_HYPERPRIVOP_CNT @@ -24,6 +28,7 @@ #define FAST_RFI #define FAST_SSM_I #define FAST_PTC_GA +#undef FAST_ITC // working but default off for now #undef RFI_TO_INTERRUPT // not working yet #endif @@ -1663,10 +1668,159 @@ ENTRY(hyper_ptc_ga) ;; END(hyper_ptc_ga) +// Registers at entry +// r17 = break immediate (XEN_HYPER_ITC_D or I) +// r18 == XSI_PSR_IC_OFS +// r31 == pr +GLOBAL_ENTRY(hyper_itc) +ENTRY(hyper_itc_i) + // fall through, hyper_itc_d handles both i and d ENTRY(hyper_itc_d) +#ifndef FAST_ITC br.spnt.many dispatch_break_fault ;; -END(hyper_itc_d) +#endif + adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r23=[r23];; + extr.u r24=r23,2,6;; // r24==logps + cmp.gt p7,p0=PAGE_SHIFT,r24 +(p7) br.spnt.many dispatch_break_fault ;; + // translate_domain_pte(r8=pteval,PSCB(ifa)=address,r24=itir) + mov r19=1;; + shl r20=r19,r24;; + adds r20=-1,r20;; // r20 == mask + movl r19=_PAGE_PPN_MASK;; + and r22=r8,r19;; // r22 == pteval & _PAGE_PPN_MASK + andcm r19=r22,r20;; + adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r21=[r21];; + and r20=r21,r20;; + or r19=r19,r20;; // r19 == mpaddr + movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r27=[r27];; + adds r27=IA64_VCPU_DOMAIN_OFFSET,r27;; + ld8 r27=[r27];; +// FIXME: is the global var dom0 always pinned? assume so for now + movl r28=dom0;; + ld8 r28=[r28];; +// FIXME: for now, only handle dom0 (see lookup_domain_mpa below) + cmp.ne p7,p0=r27,r28 +(p7) br.spnt.many dispatch_break_fault ;; + // if region 6, go slow way +#ifdef FAST_HYPERPRIVOP_CNT + cmp.eq p6,p7=XEN_HYPER_ITC_D,r17;; +(p6) movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_D);; +(p7) movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_I);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif +// FIXME: for now, just do domain0 and skip mpaddr range checks + dep r20=r0,r19,0,PAGE_SHIFT + movl r21=PAGE_PHYS ;; + or r20=r20,r21 ;; // r20==return value from lookup_domain_mpa + // r8=pteval,r20=pteval2 + movl r19=_PAGE_PPN_MASK + movl r21=_PAGE_PL_2;; + andcm r25=r8,r19;; // r25==pteval & ~_PAGE_PPN_MASK + and r22=r20,r19;; + or r22=r22,r21;; + or r22=r22,r25;; // r22==return value from translate_domain_pte + // done with translate_domain_pte + // now do vcpu_itc_no_srlz(vcpu,IorD,ifa,r22=pte,r8=mppte,r24=logps) +// FIXME: for now, just domain0 and skip range check + // psr.ic already cleared + // NOTE: r24 still contains ps (from above) + shladd r24=r24,2,r0;; + mov cr.itir=r24;; + adds r23=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r23=[r23];; + mov cr.ifa=r23;; + cmp.eq p6,p7=XEN_HYPER_ITC_D,r17;; +(p6) itc.d r22;; +(p7) itc.i r22;; + dv_serialize_data + // FIXME: how do I make assembler warnings go away here? + // vhpt_insert(r23=vaddr,r22=pte,r24=logps<<2) + thash r28=r23 + or r26=1,r22;; + ttag r21=r23 + adds r25=8,r28 + mov r19=r28;; + st8 [r25]=r24 + adds r20=16,r28;; + st8 [r19]=r26 + st8 [r20]=r21;; + // vcpu_set_tr_entry(trp,r22=pte|1,r24=itir,r23=ifa) + // TR_ENTRY = {page_flags,itir,addr,rid} + cmp.eq p6,p7=XEN_HYPER_ITC_D,r17 + movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r27=[r27];; + adds r28=IA64_VCPU_STARTING_RID_OFFSET,r27 +(p6) adds r27=IA64_VCPU_DTLB_OFFSET,r27 +(p7) adds r27=IA64_VCPU_ITLB_OFFSET,r27;; + st8 [r27]=r22,8;; // page_flags: already has pl >= 2 and p==1 + st8 [r27]=r24,8;; // itir + mov r19=-4096;; + and r23=r23,r19;; + st8 [r27]=r23,8;; // ifa & ~0xfff +// ?? is virtualize_rid(v,get_rr(ifa))==vcpu_get_rr(ifa)?? YES!! + adds r29 = XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 + extr.u r25=r23,61,3;; + shladd r29=r25,3,r29;; + ld8 r29=[r29];; + movl r20=0xffffff00;; + and r29=r29,r20;; + st8 [r27]=r29,-8;; // rid + //if ps > 12 + cmp.eq p7,p0=12<<2,r24 +(p7) br.cond.sptk.many 1f;; + // if (ps > 12) { + // trp->ppn &= ~((1UL<<(ps-12))-1); trp->vadr &= ~((1UL<